set more off 
pause off
set logtype text
set mem 500M

*************** DESCRIPTION ******************************************************
* Loads concentration data from the census. 
*
* Raw data follows different formats, so we implement separate loading procedures. 
*
* Data is aggregated into the desired level of granularity in a third file 
* (main_concen_datawork.do) which incorporates SIC-based concentration ratios 
* 
* Note, however, that data for 1992 and before is only available at the SIC-4 
* granularity which cannot be mapped to NAICS. 
*
**********************************************************************************


*******************		DATA LOADING 		*********************

/*------------------*/
/*	 	NON-MFG     */
/*------------------*/

*** 1997 - 2012 *** 

* Load all data
local mylist  2002 2007 2012 
local mylist2 42 44 48 51 52 53 54 56 61 62 71 72 81 

foreach ii of local mylist {
foreach jj of local mylist2 {

	import delimited 0.raw_inputs\Census_conc_`ii'\ECN_`ii'_US_`jj'SSSZ6_with_ann.csv, varnames(1) rowrange(3) clear 

	******* Adjustments due to unique data formatting: *******
	if "`jj'" == "54" | "`jj'" == "61" | "`jj'" == "62" | "`jj'" == "71" | "`jj'" == "81"{
		drop if optaxid ~= "T" 
	}
	else if "`ii'`jj'" == "200242" {
		drop if optaxid ~= "00" 
	}
	else if "`ii'`jj'" == "200748" {
		replace naicsid = subinstr(naicsid,"(101)", "",.) 
	}
	else if "`ii'`jj'" == "200248" {
		replace naicsid = subinstr(naicsid,"(001)", "",.) 
	}
	***********************************************************

	if `ii' ~= 2002 rename val_pct valpct

	keep naicsid concenfidi yearid rcptot valpct 
	rename yearid year
	rename concenfidi concen
	rename valpct pct_sales
	
	
	if "`ii'`jj'" ~= "200242" {
	compress
	append using temp 
	}

	save temp, replace
	sleep 1000
	}
}

destring rcptot pct_sales year, replace force
save nonmfg_post97, replace


***

* 1997 Non-manufacturing
local mylist2 42 44 48 51 52 53 54 56 61 62 71 72 81 
foreach jj of local mylist2 {

	import delimited 0.raw_inputs\Census_conc_1997\NonMfg_data\E97`jj'S6.dat, delimiter("|") clear

	g naicsid = naics
	tostring(naicsid),replace

	******* Adjustments due to unique data formatting: *******
	if "`jj'" == "54" | "`jj'" == "61" | "`jj'" == "62" | "`jj'" == "71" | "`jj'" == "81"{
		drop if taxind ~= "T" 
	}
	else if "`jj'" == "42" {
		drop if optype ~= 0 
	}
	***********************************************************

	* match formatting
	keep naics concenfi_m year ecvalue valpct 
	rename ecvalue rcptot
	rename naics naicsid
	rename valpct pct_sales
	rename concenfi concen
	tostring naics, replace force

	if "`jj'" ~= "42" {
	compress
	append using temp 
	}
	
	save temp, replace
	sleep 1000	
}

save nonmfg_97, replace


****



/*------------------*/
/*	 MANUFACTURING  */
/*------------------*/

* For 31-33, different format provided, but also used
local mylist  2002 2007 2012
foreach ii of local mylist {

import delimited 0.raw_inputs\Census_conc_`ii'\ECN_`ii'_US_31SR12_with_ann.csv, varnames(1) rowrange(3) clear 
	keep naicsid concenfidi yearid rcptot ccorcppct vsherfi

	rename concenfidi concen
	rename vsherfi hhi 
	
	rename yearid year
	rename ccorcppct pct_sales

	if "`ii'" ~= "2002" {
	compress
	append using temp 
	}

	save temp, replace
	sleep 700
}

destring hhi rcptot pct_sales year, replace force

save mfg_post97, replace


**

* 1997 manufacturing

import delimited 0.raw_inputs\Census_conc_1997\Mfg\E9731R2_data.txt, delimiter("|") varnames(1) clear 
keep naics year ecvalue vstop4 vstop8 vstop20 vstop50 vsherfi 
rename vsherfi hhi 


* map to format of other files
reshape long vstop, i(naics year ecvalue hhi) j(nfirms)

g concen = "4 largest firms" if nfirms == 4 
replace concen = "8 largest firms" if nfirms == 8 
replace concen = "20 largest firms" if nfirms == 20 
replace concen = "50 largest firms" if nfirms == 50 

tostring naics, replace
destring year, replace
rename naics naicsid 
rename ecvalue rcptot
rename vstop pct_sales

drop nfirms
compress
save mfg_97, replace






***




*******************		CONSOLIDATE INPUTS 	*********************

* append files
use nonmfg_post97, clear
append using nonmfg_97
append using mfg_post97
append using mfg_97

replace pct_sales = . if pct_sales == 0
replace hhi = . if hhi == 0

* fill in hhi and sales
destring hhi, replace force
egen tt = max(hhi), by(naicsid year )
replace hhi = tt if hhi == . 
drop tt

rename rcptot sales
destring sales, replace force
egen salest = max(sales), by(naicsid year)
replace sales = salest
drop salest

* create nfirms
g nfirms = .
replace nfirms = 4 if conce == "4 largest firms" | conce == "4 largest companies"
replace nfirms = 8 if conce == "8 largest firms" | conce == "8 largest companies"
replace nfirms = 20 if conce == "20 largest firms" | conce == "20 largest companies"
replace nfirms = 50 if conce == "50 largest firms" | conce == "50 largest companies"
drop if conce == "All firms" | conce == "All companies"

* Reshape to panel structure
keep nfirms year pct sales hhi naics
rename pct_sales a1_cenconc
reshape wide a1_cenconc* hhi, i(naics year) j(nfirms)
rename hhi4 hhi
drop hhi8 hhi20 hhi50

replace a1_cenconc8 = 100 if a1_cenconc4 == 100 & a1_cenconc8 == .  
replace a1_cenconc20 = 100 if a1_cenconc8 == 100 & a1_cenconc20 == .  
replace a1_cenconc50 = 100 if a1_cenconc20 == 100 & a1_cenconc50 == .  

saveold 2.intermediate\CenCon_naics, replace
erase temp.dta
erase nonmfg_post97.dta
erase nonmfg_97.dta
erase mfg_post97.dta
erase mfg_97.dta


